import os
import sys
import gzip
import shutil
import tempfile
import pysam

from Bio import SeqIO

library, target = sys.argv[1:]


def create_alignment(info_line, psl_line, alignments):
    words = psl_line.split()
    assert len(words) == 21
    matches = int(words[0])
    misMatches = int(words[1])
    repMatches = int(words[2])
    nCount = int(words[3])
    qNumInsert = int(words[4])
    qBaseInsert = int(words[5])
    tNumInsert = int(words[6])
    tBaseInsert = int(words[7])
    strand = words[8]
    qName = words[9]
    qSize = int(words[10])
    qStart = int(words[11])
    qEnd = int(words[12])
    tName = words[13]
    tSize = int(words[14])
    tStart = int(words[15])
    tEnd = int(words[16])
    blockCount = int(words[17])
    blockSizes = tuple(map(int, words[18].rstrip(",").split(",")))
    qStarts = tuple(map(int, words[19].rstrip(",").split(",")))
    tStarts = tuple(map(int, words[20].rstrip(",").split(",")))
    assert len(blockSizes) == blockCount
    assert len(qStarts) == blockCount
    assert len(tStarts) == blockCount
    clipped = qStart + qSize - qEnd
    score = matches - misMatches - qBaseInsert - tBaseInsert - clipped
    if target == "genome":
        alignment = next(alignments)
        assert alignment.query_name == qName
        srcTName = None
        srcTStart = None
        srcTEnd = None
    else:
        words = info_line.rstrip("\n").split("\t")
        assert len(words) == 27
        srcQName = words[0]
        srcQStart = int(words[1])
        srcQEnd = int(words[2])
        srcQSize = int(words[3])
        srcTName = words[4]
        srcTStart = int(words[5])
        srcTEnd = int(words[6])
        srcStrand = words[7]
        srcAligned = int(words[8])
        mappingQName = words[9]
        mappedQName = words[17]
        assert qSize == srcQSize
        assert qStart == srcQStart
        assert qEnd == srcQEnd
        assert srcQName == qName
        assert srcQSize == qSize
        assert srcAligned == srcQEnd - srcQStart - qBaseInsert
        assert srcAligned == matches + misMatches + nCount
        if mappedQName:
            alignment = next(alignments)
            assert mappedQName == qName
            assert alignment.query_name == qName
        else:
            assert not mappedQName
            assert not mappingQName
            alignment = pysam.AlignedSegment()
            alignment.query_name = qName
            alignment.is_unmapped = True
    alignment.set_tag("AS", score)
    alignment.set_tag("MQ", None)
    alignment.set_tag("MC", None)
    return alignment, srcTName, srcTStart, srcTEnd


def parse_alignments(library, target, alignments):
    header = """\
psLayout version 3

match	mis- 	rep. 	N's	Q gap	Q gap	T gap	T gap	strand	Q        	Q   	Q    	Q  	T        	T   	T    	T  	block	blockSizes 	qStarts	 tStarts
     	match	match	   	count	bases	count	bases	      	name     	size	start	end	name     	size	start	end	count
---------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
    filename = "%s.%s.psl" % (library, target)
    print("Reading %s" % filename)
    psl_lines = open(filename)
    psl_line1 = next(psl_lines)
    psl_line2 = next(psl_lines)
    psl_line3 = next(psl_lines)
    psl_line4 = next(psl_lines)
    psl_line5 = next(psl_lines)
    assert psl_line1 + psl_line2 + psl_line3 + psl_line4 + psl_line5 == header
    if target != "genome":
        filename = "%s.%s.info" % (library, target)
        print("Reading %s" % filename)
        info_lines = open(filename)
        info_line = next(info_lines)
        assert info_line.startswith("#")
        words = info_line[1:].split()
        assert len(words) == 27
        assert words[0] == "srcQName"
        assert words[1] == "srcQStart"
        assert words[2] == "srcQEnd"
        assert words[3] == "srcQSize"
        assert words[4] == "srcTName"
        assert words[5] == "srcTStart"
        assert words[6] == "srcTEnd"
        assert words[7] == "srcStrand"
        assert words[8] == "srcAligned"
        assert words[9] == "mappingQName"
        assert words[10] == "mappingQStart"
        assert words[11] == "mappingQEnd"
        assert words[12] == "mappingTName"
        assert words[13] == "mappingTStart"
        assert words[14] == "mappingTEnd"
        assert words[15] == "mappingStrand"
        assert words[16] == "mappingId"
        assert words[17] == "mappedQName"
        assert words[18] == "mappedQStart"
        assert words[19] == "mappedQEnd"
        assert words[20] == "mappedTName"
        assert words[21] == "mappedTStart"
        assert words[22] == "mappedTEnd"
        assert words[23] == "mappedStrand"
        assert words[24] == "mappedAligned"
        assert words[25] == "qStartTrunc"
        assert words[26] == "qEndTrunc"
    for psl_line1 in psl_lines:
        psl_line2 = next(psl_lines)
        if target == "genome":
            info_line1 = None
            info_line2 = None
        else:
            info_line1 = next(info_lines)
            info_line2 = next(info_lines)
        alignment1, name1, start1, end1 = create_alignment(info_line1, psl_line1, alignments)
        alignment2, name2, start2, end2 = create_alignment(info_line2, psl_line2, alignments)
        assert name1 == name2
        alignment1.set_tag("XT", target)
        if alignment1.is_unmapped:
            assert alignment2.is_unmapped
            alignment1.is_read1 = True
            alignment1.is_read2 = False
            alignment2.is_read1 = False
            alignment2.is_read2 = True
            alignment1.is_paired = True
            alignment2.is_paired = True
        else:
            assert not alignment2.is_unmapped
            assert alignment1.is_reverse != alignment2.is_reverse
            assert alignment1.is_read1 is True
            assert alignment1.is_read2 is False
            assert alignment2.is_read1 is False
            assert alignment2.is_read2 is True
            assert alignment1.is_paired is True
            assert alignment2.is_paired is True
        if target != "genome":
            length = end2 - start1
            assert length > 0
            alignment1.set_tag("XL", length)
        if target in ("snRNA", "scRNA", "snoRNA", "scaRNA",
                      "mRNA", "lncRNA", "gencode", "fantomcat"):
            alignment1.set_tag("XR", name1)
        yield alignment1, alignment2
    if target != "genome":
        try:
            next(info_lines)
        except StopIteration:
            pass
        else:
            raise Exception("Additional lines found in the info file")
        info_lines.close()


filename = "%s.%s.bam" % (library, target)
print("Reading %s" % filename)
alignments = pysam.AlignmentFile(filename)
stream = tempfile.NamedTemporaryFile(delete=False)
print("Writing", stream.name)
output = pysam.AlignmentFile(stream, "wb", template=alignments)

for alignment1, alignment2 in parse_alignments(library, target, alignments):
    output.write(alignment1)
    output.write(alignment2)

alignments.close()
output.close()
stream.close()

print("Moving %s to %s" % (stream.name, filename))
shutil.move(stream.name, filename)
